library(here)
library(tidyverse)
source(here("utils.R"))

Anonymize data

data_path <- here("experiments", "blocksworld-prior", "results", "data-raw",
                  "results_anonymized_12_blocksworld-prior_BG.csv");
data <- preprocess_data(data_path)
## [1] "read data from: /home/britta/UNI/Osnabrueck/conditionals-blocksworld/experiments/blocksworld-prior/results/data-raw/results_anonymized_12_blocksworld-prior_BG.csv"
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   submission_id = col_double(),
##   RT = col_double(),
##   age = col_double(),
##   endTime = col_double(),
##   experiment_id = col_double(),
##   question = col_logical(),
##   startTime = col_double(),
##   timeSpent = col_double(),
##   trial_number = col_double()
## )
## See spec(...) for full column specifications.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1450
## rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
## 20, ...].
# discard training trials
data <- data %>% filter(trial_name == "slider_main")
nrow(data)
## [1] 5000
STIMULI <- data %>% ungroup() %>% pull(stimulus_id) %>% unique()
TARGET_DIR <- here("experiments", "blocksworld-prior", "results", "plots")
dir.create(TARGET_DIR, recursive = TRUE, showWarnings = FALSE)

Look at some statistics

Duration, age, gender etc.

# duration
df <- data %>% ungroup() %>% select(participant_id, timeSpent, age, gender) %>%
  group_by(participant_id) %>% 
  distinct()

p <- df %>%
  ggplot(aes(x=factor(0), y=timeSpent)) + geom_boxplot()
# TODO: points are added twice?!
p + geom_jitter(shape=16) +
  geom_hline(aes(yintercept = mean(df$timeSpent) + sd(df$timeSpent),
                 color="yellow")) +
  geom_hline(aes(yintercept = mean(df$timeSpent) + 2 * sd(df$timeSpent),
                 color="red")) +
  theme(legend.position="none")

df %>% ungroup() %>%  group_by(participant_id) %>%  summary()
##        participant_id   timeSpent           age           gender  
##  participant1 : 1     Min.   : 7.279   Min.   :19.00   female:26  
##  participant10: 1     1st Qu.:12.355   1st Qu.:27.00   male  :24  
##  participant11: 1     Median :14.458   Median :33.50              
##  participant12: 1     Mean   :17.000   Mean   :35.10              
##  participant13: 1     3rd Qu.:17.748   3rd Qu.:42.75              
##  participant14: 1     Max.   :57.989   Max.   :63.00              
##  (Other)      :44
# Reaction time per experimental trial
df <- data %>% ungroup() %>% select(participant_id, stimulus_id, RT) %>%
  filter(!is.na(stimulus_id) & !is.na(RT)) %>% 
  group_by(stimulus_id) %>% 
  distinct()
# TODO: points are added twice
p <- df %>%  ggplot(aes(x=stimulus_id, y=RT)) + geom_boxplot() +
  geom_hline(aes(yintercept = mean(df$RT) + sd(df$RT), color="yellow")) +
  geom_hline(aes(yintercept = mean(df$RT) + 2* sd(df$RT), color="red")) +
  theme(legend.position="none", axis.text.x = element_text(angle=90)) +
  ggtitle("Reaction times per stimulus")  
ggsave(paste(TARGET_DIR, "Reaction-times.jpg", sep=.Platform$file.sep), p,
       width=6, height=4)
p

Look at comments

data %>% ungroup() %>% select(comments) %>% unique()

Discard data based on comments and reaction time

  • Discard trials were reaction time was more than 3 minutes or less than 5 seconds (remember there were 4 sliders to move in one trial)

  • filter if something went wrong according to comments

data_filtered <- data %>%
  filter(!(comments=="My final two questions, didn't have an example to show me." &
             (trial_number == 24 | trial_number == 25)))

nrow(data_filtered)
## [1] 4992
# TODO: something went wrong with reaction times! (only first trial is saved)
data_filtered <- data_filtered %>% filter(is.na(RT) | RT <= 3 * 60 * 1000)
nrow(data_filtered)
## [1] 4982
data_filtered <- data_filtered %>% filter(is.na(RT) | RT > 5000)
nrow(data_filtered)
## [1] 4971

Process data

  1. Account for different color-groups Select only variables relevant for futher analysis and write to csv
df <- data_filtered %>%
  select(-utt_idx, -trial_name, -trial_number, -timeSpent, -gender, -age,
         -comments, -RT) 

# match colors and blocks depending on color-group
data_processed <- df %>%
  group_by(participant_id, stimulus_id, color_group) %>% 
  mutate(utterance =  case_when(color_group=="group1" & utterance=="b" ~ "a",
                                color_group=="group1" & utterance=="g" ~ "c",
                                color_group=="group2" & utterance=="b" ~ "c", 
                                color_group=="group2" & utterance=="g" ~ "a",
                                utterance=="bg" ~ "ac", 
                                utterance=="none" ~ "none"
                                ),
         utterance=factor(utterance, levels = c("a", "c", "ac", "none")),
         response = response/100) %>%
  ungroup() %>% 
  select(-color_group)

Normalize data

  1. Normalize, such that all four responses (slider values) sum up to 1. There are some single trials where a participant rated all four events to have 0 probability which need to be excluded when normalized.
df <- data_processed %>%
  group_by(participant_id, stimulus_id) %>% 
  filter(sum(response) != 0)
nrow(df)
## [1] 4946
data_normalized <- df %>%
  mutate(n=sum(response), response=response/n)

data_normalized
stopifnot(data_normalized %>% filter(is.na(response)) %>% nrow() == 0)

Discard data based on minimal requirements

After normalizing, check whether minimal requirements are fulfilled.

If at least one block clearly falls/doesn’t fall (touch the ground), but participant put high probability on none (neither block touches the ground) or ac (both touch the ground), discard trial, in these cases participants cannot have been concentrated.

This doesn’t really feel good - in future, I might use control trials, e.g. after each fifth trial, a control trial (i.e.5 in total) and if more than two of the control trials aren’t answered meaningful, discard entire data of participant?! Or maybe it’s necessary to make the pictures clearer by adjusting the blocks positions for a high/low/uncertain prior to fall to make them distinguishable more easily.

The following pictures show all scenes for which I specified such a requirement. Below are the stimulus ids of those scenes of which some trials had to be discarded.

“S1-121”

“S1-121”

“S10-203”

“S10-203”

“S12-203”

“S12-203”

“S15-443”

“S15-443”

“S20-468”

“S20-468”

“S22-468”

“S22-468”

“S30-805”

“S30-805”

“S32-806”

“S32-806”

“S7-130”

“S7-130”

“S7-130-dep”

“S7-130-dep”

fn <- "scenes_luh_annotations.csv"
min.require <- read_csv(here("experiments", "stimuli", fn)) %>% 
  select("req.exp1.not.small", "req.exp1.not.large", id) %>% 
  filter((!is.na(req.exp1.not.small) | !is.na(req.exp1.not.large)))

check <- function(data_wide, stimulus){
  req <- min.require %>% filter(id== (!!stimulus))
  dat <- tibble()
  if(nrow(req) != 0){
    not_small <- req$`req.exp1.not.small`
    not_large <- req$`req.exp1.not.large`
    
    dat <- data_wide %>% filter(stimulus_id==(!!stimulus)) 
    if(!is.na(not_small)) {
      dat <- dat %>% filter(!!sym(not_small) < 0.25)
    }
    if(!is.na(not_large)){
      dat <- dat %>% filter(!!sym(not_large) > 0.75)      
    }
  }
  return(dat)
}

# some trials have less than 4 utterances since some single responses were
# excluded, e.g. due to RT. With pivot_wider, we therefore get NA-values,
# which need to be filtered out again
data_normalized_wide <- data_normalized %>%
  pivot_wider(names_from = "utterance", values_from = "response") %>% 
  filter(!is.na(ac) & !is.na(a) & !(is.na(c)) & !is.na(none)) 

critical <- tibble()
for (s in STIMULI){
  t <- check(data_normalized_wide, s)
  critical <- bind_rows(critical, t)
}

critical
critical %>% select(stimulus_id) %>% unique() %>% pull(stimulus_id)
## [1] S7-130-dep S32-806    S30-805   
## 25 Levels: S1-121 S10-203 S12-203 S15-443 S20-468 S22-468 ... S97-1674
data_normalized_wide <- anti_join(data_normalized_wide, critical)
data_normalized <- data_normalized_wide %>%
  pivot_longer(cols = c("a", "c", "ac", "none"), names_to = "utterance",
               values_to = "response")

# total and relative nb included responses
nrow(data_normalized)
## [1] 4856
nrow(data_normalized) / nrow(data)
## [1] 0.9712

Save data

fn <- here("experiments", "blocksworld-prior", "results", "data-processed",
           "tables_experimental.csv")
write.table(data_normalized , file = fn, sep = ",", row.names=FALSE)

means <- data_normalized %>%
  group_by(stimulus_id, utterance) %>%
  summarise(mean=mean(response))

fn <- here("experiments", "blocksworld-prior", "results", "data-processed",
           "tables_experimental_means.csv")
write.table(means, file = fn, sep = ",", row.names=FALSE)

Inspect processed data

# Look for participants who constantly gave all-or-none responses
df_wide <- data_normalized %>%
  pivot_wider(names_from = utterance, names_prefix = "utt.", values_from = response) %>%
  ungroup() %>%
  filter(n==1)

df <- df_wide %>%
  group_by(participant_id) %>% 
  summarise(n_all_none_stimuli = n()) 

df %>% ggplot(aes(n_all_none_stimuli)) + geom_histogram(binwidth = 1)

# which stimuli were rated as all-or-none by how many participants?
df <- df_wide  %>%
  group_by(stimulus_id) %>% 
  summarise(n_participants = n()) 

df %>% ggplot(aes(x=stimulus_id, y=n_participants)) + geom_bar(stat="identity") + 
  theme(axis.text.x = element_text(angle=90, hjust=1))

Plot the data

ids <- data_normalized %>% pull(stimulus_id) %>% unique()
data_normalized <- data_normalized %>%
  mutate(utterance = factor(utterance, levels = c("ac", "a", "c", "none")))

labels <- c(ac="Blue and Green", a="Blue and ¬Green", c="¬Blue and Green",
            none="¬Blue and ¬Green")
for (s in STIMULI){
  df <-data_normalized %>% filter(stimulus_id == s) 
  df_means <- df %>% group_by(utterance) %>% summarise(m=mean(response),
                                                       med=median(response))
  
  p <- df  %>% 
    ggplot(aes(x=factor(0), y=response, fill=utterance)) +
    geom_violin(alpha=0.5) +
    geom_jitter(width = 0.2, alpha=0.5) + 
    geom_point(data=df_means,  mapping=aes(x = factor(0), y = m), col="red") +
    geom_point(data=df_means,  mapping=aes(x=factor(0), y = med),col="yellow") +
    coord_flip() +
    labs(y="", x="") + 
    theme_classic() +
    
    facet_wrap(~utterance, labeller = labeller(utterance=labels)) + 
    # ggtitle(s) +
    theme(legend.position = "none", axis.text.y=element_blank(),
          axis.ticks.y =element_blank(),
          text = element_text(size=20),
          panel.spacing = unit(2, "lines"))
  
  fn <- paste("responses-", s, ".png", sep="")
  ggsave(paste(TARGET_DIR, fn, sep=.Platform$file.sep), p, width=6, height=4)
  print(p)
}

fn <- here("experiments", "blocksworld-prior", "results", "data-processed",
           "tables_experimental.csv")
dat.experiment1 <- read_csv(fn)
## Parsed with column specification:
## cols(
##   participant_id = col_character(),
##   stimulus_id = col_character(),
##   n = col_double(),
##   utterance = col_character(),
##   response = col_double()
## )
dat <- dat.experiment1 %>% 
  pivot_wider(names_from = utterance, values_from = response) %>% 
  add_probs() %>% 
  pivot_longer(cols=starts_with("p_"), names_to = "key", values_to = "prob")

PlotDensity <- function(dat, p1, p2, save_as){
  p <- dat %>%
    filter(key==p1 | key == p2) %>% 
    ggplot(aes(prob, fill=key)) +
    geom_density(alpha=0.5)
  
  ggsave(paste(TARGET_DIR, save_as, sep=.Platform$file.sep), p, width=6, height=4)
  return(p)
}

dat %>% PlotDensity("p_a_given_c", "p_c_given_a", "p-conditionals.png")

dat %>% PlotDensity("p_a", "p_c", "p-marginals.png")

dat %>% PlotDensity("p_na_given_nc", "p_nc_given_na", "p-neg-conditionals.png")

dat %>% PlotDensity("p_na", "p_nc", "p-neg-marginals.png")